"""
爬虫流程：
一、数据来源分析
    - 需求分析
        （我们要什么样的数据）

    - 接口分析
        （我们从哪儿去获得想要的数据）

二、爬虫代码实现
    - 发送请求
    - 接收数据
    - 解析数据
    - 保存数据
"""

import time
import requests
from parsel import Selector

DELICIOUS_FOOD_URL_LIST = []


class HunanCuisine:

    def __init__(self, url):
        self.url = url
        self.headers = {
            'Referer': 'https://home.meishichina.com/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
        }
        self.cookies = {
            'msc-user-sign-mark': '1',
            'Hm_lvt_fb9cd9dcdda23cee0c7357db9be24acb': '1707983734',
            'PHPSESSID': 'l0h15ttbvu8f3einjnpvuumk22',
            'pid': '58961707983693321',
            'Hm_lpvt_fb9cd9dcdda23cee0c7357db9be24acb': '1707983905',
        }
        self.html = ""

    def get_data(self):
        response = requests.get(self.url, headers=self.headers, cookies=self.cookies)
        response.encoding = 'utf-8'
        self.html = response.text
        time.sleep(1)

    def parse_data(self):
        global DELICIOUS_FOOD_URL_LIST
        selector = Selector(text=self.html)

        # delicious_food_url_list = selector.css("h2 a::attr(href)").getall()
        delicious_food_url_list = selector.css("div.detail h2 a::attr(title)").getall()
        DELICIOUS_FOOD_URL_LIST.extend(delicious_food_url_list)

    def run(self):
        self.get_data()
        self.parse_data()


def main():
    for page in range(13, 99):
        print('当前页数 =', page, DELICIOUS_FOOD_URL_LIST)
        url = f"https://home.meishichina.com/recipe/xiangcai/page/{page}/"
        time.sleep(1)
        spider = HunanCuisine(url)
        spider.run()

    return DELICIOUS_FOOD_URL_LIST

if __name__ == '__main__':
    url = "https://home.meishichina.com/recipe/xiangcai/page/1/"
    spider = HunanCuisine(url)
    spider.run()
    print(DELICIOUS_FOOD_URL_LIST)
    main()
